#### "How do past repression and indoctrination affect redistributive preferences?" ####
# authors: "Pelke, Lars"
# date: 2019-10-23
# written under "R version 3.6.0 (2019-03-11)"

#### Libraries ####

library(countrycode)
library(tidyverse)
library(viridis)
library(readstata13)
library(lme4)
library(performance)
library(sjPlot)
require(Amelia)


#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

#### Load data ####

merged_data <- readRDS("data/merged_data_master.rds") 

### Drop Data #####

summary(merged_data$v2exl_legitideol_5)
summary(merged_data$v2x_clphy_5)
summary(merged_data$s_far_Maddison_gdppc_1990_estim_5)
summary(merged_data$s_far_Maddison_pop_estimate_5)
summary(merged_data$edu_ineq_5)
summary(merged_data$health_ineq_5)

mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}

overview_na_macro_data <- merged_data %>% 
  group_by(data, wave, year, iso3n) %>%
  summarize(mean_e_migdppcln = mean.new(e_migdppcln),
            mean_v2x_polyarchy = mean.new(v2x_polyarchy)) 


merged_data_full <- merged_data %>%
  drop_na(edu_ineq_5, health_ineq_5, v2x_polyarchy, autocracy, e_migdppcln, s_far_Maddison_pop_estimate_5,
          s_far_Maddison_gdppc_1990_estim_5, v2x_clphy_5, v2exl_legitideol_5, autocracy_5)

#### Drop NAs in government_resp ####

merged_data_full <- merged_data_full %>%
  drop_na(income_equality)

#### Delete Cohorts with less than 1000 persons 

merged_data_full %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_full <- merged_data_full %>%
  filter(cohort_5.x >= 1910) # delete all respondents which birth cohort is before 1910


#### Extract countries with more >= 10 years of observations ####

merged_data_full2 <- merged_data_full %>%
  group_by(country_name) %>%
  mutate(num_years = max(year) - min(year)) %>%
  filter(num_years >= 10) %>%
  ungroup()

table(merged_data_full2$num_years)

#### Extract countries with more than three surveys #####

num_surveys <- merged_data_full2 %>%
  ungroup() %>%
  group_by(country_name, year) %>%
  count()

num_surveys <- num_surveys %>%
  ungroup() %>%
  select(country_name) 

counts <- data.frame(table(num_surveys))

counts <- counts %>%
  filter(Freq >= 3)

is.na(counts$num_surveys) <- counts$num_surveys == "Singapore"
is.na(counts$num_surveys) <- counts$num_surveys == "Algeria"
is.na(counts$num_surveys) <- counts$num_surveys == "Montenegro"
is.na(counts$num_surveys) <- counts$num_surveys == "Zimbabwe"

counts$num_surveys <- factor(counts$num_surveys)

counts <- counts %>%
  rename(country_name = num_surveys)

country_list <- counts$country_name

merged_data_full2 <- merged_data_full2 %>%
  ungroup() %>%
  dplyr::filter(country_name %in% country_list)

## drop observations with less than 10 individuals per cohort and country ##

count_cohort_countries <- merged_data_full2 %>%
  group_by(cohortmatch5_15, country_name) %>%
  count() %>%
  rename(indidivuals_per_country = n) %>%
  mutate(country_cohort = str_c(country_name, cohortmatch5_15, sep = "_")) %>%
  dplyr::filter(indidivuals_per_country >=10 )

country_cohort_list <- count_cohort_countries$country_cohort # list of country-years with enough data 

# filter merged_data_full 2 with above list 

merged_data_full2 <- merged_data_full2 %>%
  ungroup() %>%
  mutate(country_cohort = str_c(country_name, cohortmatch5_15, sep = "_")) 

class(merged_data_full2$country_cohort)

merged_data_full2 <- merged_data_full2 %>%
  ungroup()%>%
  dplyr::filter(country_cohort %in%country_cohort_list)

#### Delete Germany, problems with merging socialization variables ####

merged_data_full2 <- merged_data_full2 %>%
  ungroup()%>%
  dplyr::filter(country_name != "Germany")


num_waves <- merged_data_full2 %>%
  group_by(country_name, year) %>%
  summarise(wave = max(wave), 
            number_i = n(),
            v2x_regime = max(v2x_regime)) %>%
  mutate(v2x_regime = case_when(v2x_regime== 0 ~"Closed Autocracy", 
                                v2x_regime== 1 ~"Electoral Autocracy", 
                                v2x_regime== 2 ~"Electoral Democracy", 
                                v2x_regime== 3 ~"Liberal Democracy"))

library(stargazer)
stargazer(num_waves, summary=FALSE)

num_countries <-  merged_data_full2 %>% 
  group_by(country_name) %>%
  count()

stargazer(num_countries, summary=FALSE)


merged_data_full2 <- merged_data_full2 %>%
  select(-cohort_5.y) %>%
  rename(cohort_5 = cohort_5.x)



#### Multiple Imputation with Amelia ####

merged_data_full2 <- merged_data_full2 %>% 
  mutate(individual_id = row_number())

summary(merged_data_full2$sex)
summary(merged_data_full2$unemployed)
summary(merged_data_full2$education_3)
summary(merged_data_full2$income_deciles)


merged_data_full2_mi <- merged_data_full2 %>%
  select(edu_ineq_5, health_ineq_5, v2x_polyarchy, autocracy, e_migdppcln, s_far_Maddison_pop_estimate_5,
         s_far_Maddison_gdppc_1990_estim_5, v2x_clphy_5, v2exl_legitideol_5, autocracy_5, e_peginiwi, e_peginiwi_5, v2x_polyarchy_5, 
         gini_disp, gini_mkt, gini_disp_5, gini_mkt_5, 
         unemployed, education_3, sex, income_deciles, age, time_under_autocracy, time_under_autocracy_15,
         government_resp, income_equality, 
         cohort_5, cohortmatch5_15, cohortmatch5_20, year, country_name, country_id, data, individual_id)

merged_data_full2_mi <- as.data.frame(merged_data_full2_mi)

class(merged_data_full2_mi)

require(Amelia)
library(foreign)

a.merged_data_full2 <- amelia(merged_data_full2_mi, 
                              idvars=c("edu_ineq_5", "health_ineq_5", "v2x_polyarchy", "autocracy", "e_migdppcln", "s_far_Maddison_pop_estimate_5",
                                       "s_far_Maddison_gdppc_1990_estim_5", "v2x_clphy_5", "v2exl_legitideol_5", "autocracy_5", 
                                       "e_peginiwi", "e_peginiwi_5", "v2x_polyarchy_5", 
                                       "gini_disp", "gini_mkt", "gini_disp_5", "gini_mkt_5", 
                                       "cohortmatch5_15", "cohortmatch5_20", "country_id", "data", "individual_id", "income_equality"), m=10, 
                              ords=c("education_3", "unemployed", "income_deciles", "sex"),
                              ts = "year", cs = "country_name", p2s=0)

summary(a.merged_data_full2)
summary(a.merged_data_full2$imputations[[1]])

write.amelia(obj=a.merged_data_full2, file.stem = "data/income_equality_data/data", format = "dta")

